import pandas as pd
import os
script_dir = os.path.dirname(os.path.abspath(__file__))
os.chdir(script_dir)
#加载数据
df = pd.read_csv('NotClean_EVUsage_Data.csv')
#重复值检查
print(df.duplicated().sum)
print('*'*50)
# 查找并输出每列的重复率
for column in df.columns:
    total_count = df[column].count()  # 总的非空值数量
    duplicate_count = df[column][df[column].duplicated()].count()  # 重复值的数量
    duplication_rate = duplicate_count / total_count  # 计算重复率

    if not duplicate_count == 0:
        print(f"Column '{column}' has {duplicate_count} duplicates out of {total_count} values.")
        print(f"The duplication rate is: {duplication_rate:.2%}")
    else:
        print(f"Column '{column}' has no duplicate values. Duplication rate is 0%.")